#-------------------------------------------------------------------------------
#
#  Define classes for (uni/multi)-variate kernel density estimation.
#
#  Currently, only Gaussian kernels are implemented.
#
#  Written by: Robert Kern
#
#  Date: 2004-08-09
#
#  Modified: 2005-02-10 by Robert Kern.
#			  Contributed to Scipy
#			2005-10-07 by Robert Kern.
#			  Some fixes to match the new scipy_core
#
#  Copyright 2004-2005 by Enthought, Inc.
#
#-------------------------------------------------------------------------------

from __future__ import division, print_function, absolute_import

# Standard library imports.
import warnings

# Scipy imports.
from scipy.lib.six import callable, string_types
from scipy import linalg, special

from numpy import atleast_2d, reshape, zeros, newaxis, dot, exp, pi, sqrt, \
	 ravel, power, atleast_1d, squeeze, sum, transpose, multiply
import numpy as np
from numpy.random import randint, multivariate_normal

# Local imports.
from scipy.stats import mvn

# Addtional imports
from pymc.distributions import rcategorical

__all__ = ['gaussian_kde']


class gaussian_kde(object):
	"""Representation of a kernel-density estimate using Gaussian kernels.

	Kernel density estimation is a way to estimate the probability density
	function (PDF) of a random variable in a non-parametric way.
	`gaussian_kde` works for both uni-variate and multi-variate data.   It
	includes automatic bandwidth determination.  The estimation works best for
	a unimodal distribution; bimodal or multi-modal distributions tend to be
	oversmoothed.

	Parameters
	----------
	dataset : array_like
		Datapoints to estimate from. In case of univariate data this is a 1-D
		array, otherwise a 2-D array with shape (# of dims, # of data).
	bw_method : str, scalar or callable, optional
		The method used to calculate the estimator bandwidth.  This can be
		'scott', 'silverman', a scalar constant or a callable.  If a scalar,
		this will be used directly as `kde.factor`.  If a callable, it should
		take a `gaussian_kde` instance as only parameter and return a scalar.
		If None (default), 'scott' is used.  See Notes for more details.

	Attributes
	----------
	dataset : ndarray
		The dataset with which `gaussian_kde` was initialized.
	d : int
		Number of dimensions.
	n : int
		Number of datapoints.
	factor : float
		The bandwidth factor, obtained from `kde.covariance_factor`, with which
		the covariance matrix is multiplied.
	covariance : ndarray
		The covariance matrix of `dataset`, scaled by the calculated bandwidth
		(`kde.factor`).
	inv_cov : ndarray
		The inverse of `covariance`.

	Methods
	-------
	kde.evaluate(points) : ndarray
		Evaluate the estimated pdf on a provided set of points.
	kde(points) : ndarray
		Same as kde.evaluate(points)
	kde.integrate_gaussian(mean, cov) : float
		Multiply pdf with a specified Gaussian and integrate over the whole
		domain.
	kde.integrate_box_1d(low, high) : float
		Integrate pdf (1D only) between two bounds.
	kde.integrate_box(low_bounds, high_bounds) : float
		Integrate pdf over a rectangular space between low_bounds and
		high_bounds.
	kde.integrate_kde(other_kde) : float
		Integrate two kernel density estimates multiplied together.
	kde.resample(size=None) : ndarray
		Randomly sample a dataset from the estimated pdf.
	kde.set_bandwidth(bw_method='scott') : None
		Computes the bandwidth, i.e. the coefficient that multiplies the data
		covariance matrix to obtain the kernel covariance matrix.
		.. versionadded:: 0.11.0
	kde.covariance_factor : float
		Computes the coefficient (`kde.factor`) that multiplies the data
		covariance matrix to obtain the kernel covariance matrix.
		The default is `scotts_factor`.  A subclass can overwrite this method
		to provide a different method, or set it through a call to
		`kde.set_bandwidth`.


	Notes
	-----
	Bandwidth selection strongly influences the estimate obtained from the KDE
	(much more so than the actual shape of the kernel).  Bandwidth selection
	can be done by a "rule of thumb", by cross-validation, by "plug-in
	methods" or by other means; see [3]_, [4]_ for reviews.  `gaussian_kde`
	uses a rule of thumb, the default is Scott's Rule.

	Scott's Rule [1]_, implemented as `scotts_factor`, is::

		n**(-1./(d+4)),

	with ``n`` the number of data points and ``d`` the number of dimensions.
	Silverman's Rule [2]_, implemented as `silverman_factor`, is::

		n * (d + 2) / 4.)**(-1. / (d + 4)).

	Good general descriptions of kernel density estimation can be found in [1]_
	and [2]_, the mathematics for this multi-dimensional implementation can be
	found in [1]_.

	References
	----------
	.. [1] D.W. Scott, "Multivariate Density Estimation: Theory, Practice, and
		   Visualization", John Wiley & Sons, New York, Chicester, 1992.
	.. [2] B.W. Silverman, "Density Estimation for Statistics and Data
		   Analysis", Vol. 26, Monographs on Statistics and Applied Probability,
		   Chapman and Hall, London, 1986.
	.. [3] B.A. Turlach, "Bandwidth Selection in Kernel Density Estimation: A
		   Review", CORE and Institut de Statistique, Vol. 19, pp. 1-33, 1993.
	.. [4] D.M. Bashtannyk and R.J. Hyndman, "Bandwidth selection for kernel
		   conditional density estimation", Computational Statistics & Data
		   Analysis, Vol. 36, pp. 279-298, 2001.

	Examples
	--------
	Generate some random two-dimensional data:

	>>> from scipy import stats
	>>> def measure(n):
	>>>	 "Measurement model, return two coupled measurements."
	>>>	 m1 = np.random.normal(size=n)
	>>>	 m2 = np.random.normal(scale=0.5, size=n)
	>>>	 return m1+m2, m1-m2

	>>> m1, m2 = measure(2000)
	>>> xmin = m1.min()
	>>> xmax = m1.max()
	>>> ymin = m2.min()
	>>> ymax = m2.max()

	Perform a kernel density estimate on the data:

	>>> X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
	>>> positions = np.vstack([X.ravel(), Y.ravel()])
	>>> values = np.vstack([m1, m2])
	>>> kernel = stats.gaussian_kde(values)
	>>> Z = np.reshape(kernel(positions).T, X.shape)

	Plot the results:

	>>> import matplotlib.pyplot as plt
	>>> fig = plt.figure()
	>>> ax = fig.add_subplot(111)
	>>> ax.imshow(np.rot90(Z), cmap=plt.cm.gist_earth_r,
	...		   extent=[xmin, xmax, ymin, ymax])
	>>> ax.plot(m1, m2, 'k.', markersize=2)
	>>> ax.set_xlim([xmin, xmax])
	>>> ax.set_ylim([ymin, ymax])
	>>> plt.show()

	"""
	def __init__(self, dataset, weights=None, bw_method=None):
		self.dataset = atleast_2d(dataset)
		if not self.dataset.size > 1:
			raise ValueError("`dataset` input should have multiple elements.")

		self.d, self.n = self.dataset.shape
		self.set_bandwidth(bw_method=bw_method)
		
		if weights is None:
			self.weights = np.ones(self.n)*(1/self.n)
		else:
			self.weights = np.array(weights)

	def evaluate(self, points):
		"""Evaluate the estimated pdf on a set of points.

		Parameters
		----------
		points : (# of dimensions, # of points)-array
			Alternatively, a (# of dimensions,) vector can be passed in and
			treated as a single point.

		Returns
		-------
		values : (# of points,)-array
			The values at each point.

		Raises
		------
		ValueError : if the dimensionality of the input points is different than
					 the dimensionality of the KDE.

		"""
		points = atleast_2d(points)

		d, m = points.shape
		if d != self.d:
			if d == 1 and m == self.d:
				# points was passed in as a row vector
				points = reshape(points, (self.d, 1))
				m = 1
			else:
				msg = "points have dimension %s, dataset has dimension %s" % (d,
					self.d)
				raise ValueError(msg)

		result = zeros((m,), dtype=np.float)

		# Siyuan: Add weight to gaussians
		# loop over points
		for i in range(m):
			diff = self.dataset - points[:, i, newaxis]
			tdiff = dot(self.inv_cov, diff)
			energy = sum(multiply(diff, tdiff), axis=0) / 2.0
			result[i] = sum(self.weights*exp(-energy), axis=0)

		result = result / (self._norm_factor)
		
		return result

	__call__ = evaluate

	def integrate_gaussian(self, mean, cov):
		"""
		Multiply estimated density by a multivariate Gaussian and integrate
		over the whole space.

		Parameters
		----------
		mean : aray_like
			A 1-D array, specifying the mean of the Gaussian.
		cov : array_like
			A 2-D array, specifying the covariance matrix of the Gaussian.

		Returns
		-------
		result : scalar
			The value of the integral.

		Raises
		------
		ValueError :
			If the mean or covariance of the input Gaussian differs from
			the KDE's dimensionality.

		"""
		mean = atleast_1d(squeeze(mean))
		cov = atleast_2d(cov)

		if mean.shape != (self.d,):
			raise ValueError("mean does not have dimension %s" % self.d)
		if cov.shape != (self.d, self.d):
			raise ValueError("covariance does not have dimension %s" % self.d)

		# make mean a column vector
		mean = mean[:, newaxis]

		sum_cov = self.covariance + cov

		diff = self.dataset - mean
		tdiff = dot(linalg.inv(sum_cov), diff)

		energies = sum(diff * tdiff, axis=0) / 2.0
		result = sum(exp(-energies), axis=0) / sqrt(linalg.det(2 * pi *
														sum_cov)) / self.n

		return result

	def integrate_box_1d(self, low, high):
		"""
		Computes the integral of a 1D pdf between two bounds.

		Parameters
		----------
		low : scalar
			Lower bound of integration.
		high : scalar
			Upper bound of integration.

		Returns
		-------
		value : scalar
			The result of the integral.

		Raises
		------
		ValueError
			If the KDE is over more than one dimension.

		"""
		if self.d != 1:
			raise ValueError("integrate_box_1d() only handles 1D pdfs")

		stdev = ravel(sqrt(self.covariance))[0]

		normalized_low = ravel((low - self.dataset) / stdev)
		normalized_high = ravel((high - self.dataset) / stdev)

		value = np.mean(special.ndtr(normalized_high) -
						special.ndtr(normalized_low))
		return value

	def integrate_box(self, low_bounds, high_bounds, maxpts=None):
		"""Computes the integral of a pdf over a rectangular interval.

		Parameters
		----------
		low_bounds : array_like
			A 1-D array containing the lower bounds of integration.
		high_bounds : array_like
			A 1-D array containing the upper bounds of integration.
		maxpts : int, optional
			The maximum number of points to use for integration.

		Returns
		-------
		value : scalar
			The result of the integral.

		"""
		if maxpts is not None:
			extra_kwds = {'maxpts': maxpts}
		else:
			extra_kwds = {}

		value, inform = mvn.mvnun(low_bounds, high_bounds, self.dataset,
								  self.covariance, **extra_kwds)
		if inform:
			msg = ('An integral in mvn.mvnun requires more points than %s' %
				   (self.d * 1000))
			warnings.warn(msg)

		return value

	def integrate_kde(self, other):
		"""
		Computes the integral of the product of this  kernel density estimate
		with another.

		Parameters
		----------
		other : gaussian_kde instance
			The other kde.

		Returns
		-------
		value : scalar
			The result of the integral.

		Raises
		------
		ValueError
			If the KDEs have different dimensionality.

		"""
		if other.d != self.d:
			raise ValueError("KDEs are not the same dimensionality")

		# we want to iterate over the smallest number of points
		if other.n < self.n:
			small = other
			large = self
		else:
			small = self
			large = other

		sum_cov = small.covariance + large.covariance
		result = 0.0
		for i in range(small.n):
			mean = small.dataset[:, i, newaxis]
			diff = large.dataset - mean
			tdiff = dot(linalg.inv(sum_cov), diff)

			energies = sum(diff * tdiff, axis=0) / 2.0
			result += sum(exp(-energies), axis=0)

		result /= sqrt(linalg.det(2 * pi * sum_cov)) * large.n * small.n

		return result

	def resample(self, size=None):
		"""
		Randomly sample a dataset from the estimated pdf.

		Parameters
		----------
		size : int, optional
			The number of samples to draw.  If not provided, then the size is
			the same as the underlying dataset.

		Returns
		-------
		resample : (self.d, `size`) ndarray
			The sampled dataset.

		"""
		if size is None:
			size = self.n

		norm = transpose(multivariate_normal(zeros((self.d,), float),
						 self.covariance, size=size))
		
		# Siyuan: New sampling method using weighted data
		#indices = randint(0, self.n, size=size)
		indices = []
		for i in range(size):
			indices.append(rcategorical(self.weights))
		means = self.dataset[:, indices]

		return means + norm

	def scotts_factor(self):
		return power(self.n, -1./(self.d+4))

	def silverman_factor(self):
		return power(self.n*(self.d+2.0)/4.0, -1./(self.d+4))

	#  Default method to calculate bandwidth, can be overwritten by subclass
	covariance_factor = scotts_factor

	def set_bandwidth(self, bw_method=None):
		"""Compute the estimator bandwidth with given method.

		The new bandwidth calculated after a call to `set_bandwidth` is used
		for subsequent evaluations of the estimated density.

		Parameters
		----------
		bw_method : str, scalar or callable, optional
			The method used to calculate the estimator bandwidth.  This can be
			'scott', 'silverman', a scalar constant or a callable.  If a
			scalar, this will be used directly as `kde.factor`.  If a callable,
			it should take a `gaussian_kde` instance as only parameter and
			return a scalar.  If None (default), nothing happens; the current
			`kde.covariance_factor` method is kept.

		Notes
		-----
		.. versionadded:: 0.11

		Examples
		--------
		>>> x1 = np.array([-7, -5, 1, 4, 5.])
		>>> kde = stats.gaussian_kde(x1)
		>>> xs = np.linspace(-10, 10, num=50)
		>>> y1 = kde(xs)
		>>> kde.set_bandwidth(bw_method='silverman')
		>>> y2 = kde(xs)
		>>> kde.set_bandwidth(bw_method=kde.factor / 3.)
		>>> y3 = kde(xs)

		>>> fig = plt.figure()
		>>> ax = fig.add_subplot(111)
		>>> ax.plot(x1, np.ones(x1.shape) / (4. * x1.size), 'bo',
		...		 label='Data points (rescaled)')
		>>> ax.plot(xs, y1, label='Scott (default)')
		>>> ax.plot(xs, y2, label='Silverman')
		>>> ax.plot(xs, y3, label='Const (1/3 * Silverman)')
		>>> ax.legend()
		>>> plt.show()

		"""
		if bw_method is None:
			pass
		elif bw_method == 'scott':
			self.covariance_factor = self.scotts_factor
		elif bw_method == 'silverman':
			self.covariance_factor = self.silverman_factor
		elif np.isscalar(bw_method) and not isinstance(bw_method, string_types):
			self._bw_method = 'use constant'
			self.covariance_factor = lambda: bw_method
		elif callable(bw_method):
			self._bw_method = bw_method
			self.covariance_factor = lambda: self._bw_method(self)
		else:
			msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
				  "or a callable."
			raise ValueError(msg)

		self._compute_covariance()

	def _compute_covariance(self):
		"""Computes the covariance matrix for each Gaussian kernel using
		covariance_factor().
		"""
		self.factor = self.covariance_factor()
		# Cache covariance and inverse covariance of the data
		if not hasattr(self, '_data_inv_cov'):
			self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
											   bias=False))
			self._data_inv_cov = linalg.inv(self._data_covariance)

		self.covariance = self._data_covariance * self.factor**2
		self.inv_cov = self._data_inv_cov / self.factor**2

		# Siyuan: normalize factor for each Gaussian (remove the division of n)
# 		self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n
		self._norm_factor = sqrt(linalg.det(2*pi*self.covariance))
